##List of Species
A testing data set. Below we read in the data.
List_of_Species = read.csv('D:/TEST.csv')
head(List_of_Species)
## Taxon Author
## 1 Spathacanthus hahnianus Baill.
## 2 Spathacanthus parviflorus Leonard
## 3 Saurauia angustifolia Turcz.
## 4 Saurauia aspera Turcz.
## 5 Saurauia cana B.T.Keller & Breedlove
## 6 Saurauia comitis-rossei R.E.Schult.
##Using the gbif API
We can access the gbif API to give us relevant information about each species whilst also simultaneously using this information to clean our results. (I separately mutate multiple columns within each dataframe retrieved from the API to show the information we use to plot later)
X = List_of_Species %>%
set_names(c("Taxon", "Author")) %>% # set names for columns
mutate(keys = map(Taxon, function(x) name_backbone(name = x)$speciesKey)) %>% # obtain initial dataframe information by using keys
mutate(rgbif_pull = map(keys, occ_search, limit = 200)) %>% # use keys to gather dataframe information
mutate(Latitude = map(rgbif_pull, pluck, 'data', 'decimalLatitude')) %>% #pluck latitude
mutate(Longitude = map(rgbif_pull, pluck, 'data', 'decimalLongitude')) %>%# pluck longitude
mutate(Year = map(rgbif_pull, pluck, 'data', 'year')) %>% # pluck year
mutate(Source = map(rgbif_pull, pluck, 'data', 'datasetName')) %>% # pluck source
select(Taxon, Latitude, Longitude, Year, Source) %>% # obtain latitude and longitude occurrence data
filter(Longitude != 'NULL') %>% # filter NULLs
filter(Source != 'NULL') %>%
filter(Year != 'NULL')
P = c()
#for loop to clean data and get it into a format we care about and we can easily plot
for (i in 1:nrow(X)) {
Y = cbind(X$Latitude[[i]],X$Longitude[[i]])
Z = cbind(X$Year[[i]],X$Source[[i]])
O = rep(X$Taxon[i], each = nrow(Y))
Q = cbind(O,Y,Z)
P = rbind(P,Q)
}
#produce a tibble with renamed headers and make Species a factor
ALL_DATA = P %>%
as_tibble() %>%
na.omit() %>%
rename(., c(Species = O, Lat = V2, Lon = V3, Year = V4, Source = V5)) %>%
mutate(Lat = as.double(Lat)) %>%
mutate(Lon = as.double(Lon)) %>%
mutate(Species = as.factor(Species))
head(ALL_DATA)
## # A tibble: 6 x 5
## Species Lat Lon Year Source
## <fct> <dbl> <dbl> <chr> <chr>
## 1 Spathacanthus hah~ 16.9 -93.4 2021 iNaturalist research-grade observations
## 2 Spathacanthus hah~ 20.0 -97.7 2016 NMNH Extant Biology
## 3 Spathacanthus hah~ 19.9 -97.5 2016 NMNH Extant Biology
## 4 Spathacanthus hah~ 20.0 -97.5 2015 NMNH Extant Biology
## 5 Spathacanthus hah~ 15.6 -92.8 2014 Fortalecimiento de las colecciones de EC~
## 6 Spathacanthus hah~ 16.8 -93.7 2014 iNaturalist research-grade observations
Now we have a dataframe which contains co-ordinate data for all the species we care about and the relevant year and source of occurrence
world = ne_countries(scale = 'medium', returnclass = 'sf') %>%
st_as_sf()
test_plot = ggplot() +
geom_sf(data = world) +
geom_point(data = ALL_DATA,
aes(x = Lon, y = Lat, fill = Species, color = Year, label = Source),
size = 0.5) +
theme(legend.position = 'none')
ggplotly(test_plot)